library(readxl)
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
rm(list=ls())
tdata <- read_excel("../data/tuoye.xlsx", col_names = TRUE, row.names(TRUE) )
## New names:
## • `` -> `...1`
xdata <- read_excel("../data/xueqing.xlsx", col_names = TRUE, row.names(TRUE) )
## New names:
## • `` -> `...1`
检查正态性 Shapiro-Wilk 检验 p < 0.05证明不是正态分布
唾液数据
#创建因子列表
factors <- colnames(tdata)[3:(ncol(tdata)-3)]
#对因子正态性逐个检验
normality <- data.frame(Factor = character(), Shapiro_p_value = numeric(), stringsAsFactors = FALSE)
# 遍历每个因子,进行Shapiro-Wilk检验
for (factor in factors) {
test_result <- shapiro.test(tdata[[factor]]) # 对每个因子进行正态性检验
normality <- rbind(normality, data.frame(Factor = factor, Shapiro_p_value = test_result$p.value))
}
normality
## Factor Shapiro_p_value
## 1 MCP1 0.895041143
## 2 TNFa 0.597066226
## 3 CASP1 0.106085918
## 4 LDH 0.437689562
## 5 GM-CSF 0.267332512
## 6 MIP1a 0.009081237
## 7 HIF1a 0.027476154
## 8 ALP 0.180940921
## 9 BMP2 0.042243836
## 10 VEGFA 0.061479671
## 11 NTXI 0.006868238
## 12 COX2 0.011428387
## 13 MPO 0.239740903
## 14 IL6 0.109711119
## 15 IL17 0.034172419
## 16 IL1b 0.380964534
## 17 CTSK 0.115337539
## 18 MMP9 0.008590777
## 19 PECAM1 0.005958055
## 20 CAT 0.104453106
## 21 MMP-8 0.067476021
not_normal_tfactors <- subset(normality, Shapiro_p_value < 0.05)
not_normal_tfactors
## Factor Shapiro_p_value
## 6 MIP1a 0.009081237
## 7 HIF1a 0.027476154
## 9 BMP2 0.042243836
## 11 NTXI 0.006868238
## 12 COX2 0.011428387
## 15 IL17 0.034172419
## 18 MMP9 0.008590777
## 19 PECAM1 0.005958055
write.csv(not_normal_tfactors, "../result/tdata_not_normal.csv", row.names = FALSE)
血清数据
#对因子正态性逐个检验
normality <- data.frame(Factor = character(), Shapiro_p_value = numeric(), stringsAsFactors = FALSE)
# 遍历每个因子,进行Shapiro-Wilk检验
for (factor in factors) {
test_result <- shapiro.test(xdata[[factor]]) # 对每个因子进行正态性检验
normality <- rbind(normality, data.frame(Factor = factor, Shapiro_p_value = test_result$p.value))
}
normality
## Factor Shapiro_p_value
## 1 MCP1 0.0240963557
## 2 TNFa 0.0025059918
## 3 CASP1 0.0015234526
## 4 LDH 0.0156311860
## 5 GM-CSF 0.0238918342
## 6 MIP1a 0.0065652202
## 7 HIF1a 0.0041192518
## 8 ALP 0.0074922249
## 9 BMP2 0.0055832873
## 10 VEGFA 0.0299140426
## 11 NTXI 0.0016756912
## 12 COX2 0.0042097370
## 13 MPO 0.0077907045
## 14 IL6 0.0006423363
## 15 IL17 0.0097113058
## 16 IL1b 0.0021889971
## 17 CTSK 0.0016493802
## 18 MMP9 0.0120477205
## 19 PECAM1 0.0004854068
## 20 CAT 0.1358007572
## 21 MMP-8 0.0158732642
not_normal_xfactors <- subset(normality, Shapiro_p_value < 0.05)
not_normal_xfactors
## Factor Shapiro_p_value
## 1 MCP1 0.0240963557
## 2 TNFa 0.0025059918
## 3 CASP1 0.0015234526
## 4 LDH 0.0156311860
## 5 GM-CSF 0.0238918342
## 6 MIP1a 0.0065652202
## 7 HIF1a 0.0041192518
## 8 ALP 0.0074922249
## 9 BMP2 0.0055832873
## 10 VEGFA 0.0299140426
## 11 NTXI 0.0016756912
## 12 COX2 0.0042097370
## 13 MPO 0.0077907045
## 14 IL6 0.0006423363
## 15 IL17 0.0097113058
## 16 IL1b 0.0021889971
## 17 CTSK 0.0016493802
## 18 MMP9 0.0120477205
## 19 PECAM1 0.0004854068
## 21 MMP-8 0.0158732642
write.csv(not_normal_xfactors, "../result/xdata_not_normal.csv", row.names = FALSE)
最终得到血清数据中除了CAT都是非正态数据 唾液数据中非正态数据包括:MIP1a, HIF1a, BMP2, NTXI, COX2, IL17, MMP9, PECAM1
正态分布数据使用Pearson 相关系数,非正态分布的数据使用Spearman 相关系数
相关性矩阵
# 正态分布因子
cor_matrix_BVTV_t <- cor(tdata[, c("BV/TV", "MCP1" , "TNFa" , "CASP1" , "LDH" , "GM-CSF" , "ALP" , "VEGFA" , "MPO" , "IL6" , "IL1b" , "CTSK" , "MMP-8", "CAT" )], use = "complete.obs", method = "pearson")
write.csv(cor_matrix_BVTV_t, "../result/cor_matrix/bvtv_t.csv",row.names = TRUE)
# 非正态分布因子
cor_matrix_spearman_BVTV_t <- cor(tdata[, c("BV/TV", "MIP1a" , "HIF1a" , "BMP2" , "NTXI" , "COX2" , "IL17" , "MMP9" , "PECAM1" )], use = "complete.obs", method = "spearman")
write.csv(cor_matrix_spearman_BVTV_t, "../result/cor_matrix/bvtv_t_non_normal.csv", row.names = TRUE)
tdata$Tb.Sp
## [1] 0.2025 0.2051 0.2088 0.1444 0.2114 0.2678 0.3539 0.3950 0.3320 0.3524
## [11] 0.3554 0.4678 0.6661 0.6473 0.5995 0.5837 0.6609 0.6045
可视化
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_BVTV_t,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_BVTV_t,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
#corrplot(cor_matrix_spearman_BVTV_t, method = "circle", type = "upper", addCoef.col = "black", order = #"hclust", tl.col = "black", tl.srt = 45)
相关性显著性检验
normal_tfactors <- c("MCP1", "TNFa", "CASP1", "LDH", "GM-CSF", "ALP", "VEGFA", "MPO", "IL6", "IL1b", "CTSK", "MMP-8", "CAT")
non_normal_tfactors <- c("MIP1a", "HIF1a", "BMP2", "NTXI", "COX2", "IL17", "MMP9", "PECAM1")
cor_test_normal_BVTV_t <- data.frame(Factor = character(),
Pearson_p_value = numeric(),
stringsAsFactors = FALSE)
# 对每个正态分布的因子进行 Pearson 相关性检验
for (factor in normal_tfactors) {
test_result <- cor.test(tdata$`BV/TV`, tdata[[factor]], method = "pearson")
cor_test_normal_BVTV_t <- rbind(cor_test_normal_BVTV_t, data.frame(Factor = factor, Pearson_p_value = test_result$p.value))
}
cor_test_non_normal_bvtv_t <- data.frame(Factor = character(),
Spearman_p_value = numeric(),
stringsAsFactors = FALSE)
# 对每个非正态分布的因子进行 Spearman 相关性检验
for (factor in non_normal_tfactors) {
test_result <- cor.test(tdata$`BV/TV`, tdata[[factor]], method = "spearman")
cor_test_non_normal_bvtv_t <- rbind(cor_test_non_normal_bvtv_t, data.frame(Factor = factor, Spearman_p_value = test_result$p.value))
}
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
cor_test_normal_BVTV_t
## Factor Pearson_p_value
## 1 MCP1 0.9463708
## 2 TNFa 0.8909695
## 3 CASP1 0.9173662
## 4 LDH 0.8554642
## 5 GM-CSF 0.7574734
## 6 ALP 0.5222242
## 7 VEGFA 0.3835144
## 8 MPO 0.4025683
## 9 IL6 0.3422514
## 10 IL1b 0.7704817
## 11 CTSK 0.3104577
## 12 MMP-8 0.5253370
## 13 CAT 0.7109149
cor_test_non_normal_bvtv_t
## Factor Spearman_p_value
## 1 MIP1a 0.9252868
## 2 HIF1a 0.8097502
## 3 BMP2 0.8160931
## 4 NTXI 0.9285272
## 5 COX2 0.8994083
## 6 IL17 0.4641971
## 7 MMP9 0.9707317
## 8 PECAM1 0.8736259
write.csv(cor_test_normal_BVTV_t, "../result/cor_test_bvtv_t.csv", row.names = FALSE)
write.table(cor_test_non_normal_bvtv_t, "../result/cor_test_bvtv_t.csv", append = TRUE, sep = ",", col.names = TRUE, row.names = FALSE)
## Warning in write.table(cor_test_non_normal_bvtv_t,
## "../result/cor_test_bvtv_t.csv", : appending column names to file
cor_matrix_bvtv_x <- cor(tdata[, c("BV/TV", "CAT" )], use = "complete.obs", method = "pearson")
write.csv(cor_matrix_bvtv_x, "../result/cor_matrix/bvtv_x.csv",row.names = TRUE)
# 血清因子除了CAT都为非正态分布
cor_matrix_spearman_BVTV_x <- cor(xdata[, c("BV/TV", factors[factors != "CAT"])], use = "complete.obs", method = "spearman")
write.csv(cor_matrix_spearman_BVTV_x, "../result/cor_matrix/bvtv_x_non_normal.csv", row.names = TRUE)
corrplot(cor_matrix_bvtv_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
corrplot(cor_matrix_spearman_BVTV_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_bvtv_x,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_BVTV_x,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# 正态分布因子
cor_matrix_tbsp_t <- cor(tdata[, c("Tb.Sp", "MCP1" , "TNFa" , "CASP1" , "LDH" , "GM-CSF" , "ALP" , "VEGFA" , "MPO" , "IL6" , "IL1b" , "CTSK" , "MMP-8", "CAT" )], use = "complete.obs", method = "pearson")
write.csv(cor_matrix_tbsp_t, "../result/cor_matrix/tbsp_t.csv",row.names = TRUE)
# 非正态分布因子
cor_matrix_spearman_tbsp_t <- cor(tdata[, c("Tb.Sp", "MIP1a" , "HIF1a" , "BMP2" , "NTXI" , "COX2" , "IL17" , "MMP9" , "PECAM1")], use = "complete.obs", method = "spearman")
write.csv(cor_matrix_spearman_tbsp_t, "../result/cor_matrix/tbsp_t_non_normal.csv", row.names = TRUE)
corrplot(cor_matrix_tbsp_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
corrplot(cor_matrix_spearman_tbsp_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_tbsp_t,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_tbsp_t,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
cor_matrix_tbsp_x <- cor(tdata[, c("Tb.Sp", "CAT" )], use = "complete.obs", method = "pearson")
write.csv(cor_matrix_tbsp_x, "../result/cor_matrix/tbsp_x.csv",row.names = TRUE)
# 血清因子除了CAT都为非正态分布
cor_matrix_spearman_tbsp_x <- cor(xdata[, c("Tb.Sp", factors[factors != "CAT"])], use = "complete.obs", method = "spearman")
write.csv(cor_matrix_spearman_tbsp_x, "../result/cor_matrix/tbsp_x_non_normal.csv", row.names = TRUE)
corrplot(cor_matrix_tbsp_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
corrplot(cor_matrix_spearman_tbsp_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_tbsp_x,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_tbsp_x,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# 正态分布因子
cor_matrix_m1_t <- cor(tdata[, c("M1", "MCP1" , "TNFa" , "CASP1" , "LDH" , "GM-CSF" , "ALP" , "VEGFA" , "MPO" , "IL6" , "IL1b" , "CTSK" , "MMP-8" , "CAT")], use = "complete.obs", method = "pearson")
write.csv(cor_matrix_m1_t, "../result/cor_matrix/m1_t.csv",row.names = TRUE)
# 非正态分布因子
cor_matrix_spearman_m1_t <- cor(tdata[, c("M1", "MIP1a" , "HIF1a" , "BMP2" , "NTXI" , "COX2" , "IL17" , "MMP9" , "PECAM1" )], use = "complete.obs", method = "spearman")
write.csv(cor_matrix_spearman_m1_t, "../result/cor_matrix/m1_t_non_normal.csv", row.names = TRUE)
corrplot(cor_matrix_m1_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
corrplot(cor_matrix_spearman_m1_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_m1_t,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_m1_t,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
cor_matrix_m1_x <- cor(tdata[, c("M1", "CAT" )], use = "complete.obs", method = "pearson")
write.csv(cor_matrix_m1_x, "../result/cor_matrix/m1_x.csv",row.names = TRUE)
# 血清因子除了CAT都为非正态分布
cor_matrix_spearman_m1_x <- cor(xdata[, c("M1", factors[factors != "CAT"])], use = "complete.obs", method = "spearman")
write.csv(cor_matrix_spearman_m1_x, "../result/cor_matrix/m1_x_non_normal.csv", row.names = TRUE)
corrplot(cor_matrix_m1_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
corrplot(cor_matrix_spearman_m1_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_m1_x,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序
# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_m1_x,
upper = "circle", # 上半部分显示相关性图
lower = "number", # 下半部分显示相关性系数
number.cex = 0.7, # 控制数字大小
tl.col = "black", # 标签颜色
tl.srt = 45, # 标签旋转角度
tl.pos = "lt", # 因子名字放在顶部
order = "hclust") # 聚类排序